1 Loading in Packages + Dataset

## 
## The downloaded binary packages are in
##  /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp2fmjdA/downloaded_packages
load(here("jk_code", "SO4analysis.rds"))

2 Isolating control and SO4 samples

DimPlot(SO4,split.by = "treatment",group.by = "subclass_MD" )

Idents <- "treatment"
DimPlot(SO4)

Idents(SO4) <- "sample"

DimPlot(SO4,split.by = "sample")

control <- subset(SO4, idents = c("SO1","SO4"))

low_salt <- subset(SO4, idents = c("SO2","SO3"))

DimPlot(SO4)

3 Finding Markers that define each Control Sample

control <- SCTransform(control) %>%
    RunPCA() %>%
    FindNeighbors(dims = 1:30) %>%
    FindClusters() %>%
    RunUMAP(dims = 1:30)
## Running SCTransform on assay: RNA
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 21397 by 5010
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## Found 126 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 21397 genes
## Computing corrected count matrix for 21397 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 11.95562 secs
## Determine variable features
## Centering data matrix
## Place corrected count matrix in counts slot
## Warning: The `slot` argument of `SetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Different cells and/or features from existing assay SCT
## Set default assay to SCT
## PC_ 1 
## Positive:  Pappa2, Malat1, S100g, Zfand5, Pde10a, Ramp3, Robo2, Sdc4, Itga4, Gm37376 
##     Aard, Nadk2, Nos1, Ranbp3l, Wwc2, Sgms2, Neat1, Irx1, Col4a3, Col4a4 
##     Tmem158, Aebp1, Cdkn1c, Itprid2, Cacna1d, Ptgs2, Ccdc80, Bmp3, Camk2d, Uroc1 
## Negative:  Umod, Egf, Tmem52b, Fabp3, Klk1, Sostdc1, Sult1d1, Prdx5, Wfdc15b, Ly6a 
##     Foxq1, Cox6c, Krt7, Ckb, Cldn19, Cox5b, Slc25a5, Wfdc2, Atp5g1, Atp1a1 
##     Ggt1, Cox4i1, Atp1b1, Gm47708, Gadd45g, Cox7b, Ndufa4, Cox8a, Atp5b, Gm44120 
## PC_ 2 
## Positive:  Ubb, Mt1, Fth1, Gm22133, Hspa1a, Ldhb, Actb, Hspa1b, H3f3b, Fos 
##     Junb, Ftl1, Eif1, Jund, Jun, Cd63, Fxyd2, Prdx1, Mt2, Tmem213 
##     Mpc2, Cd9, Mgst1, Rps24, Rpl37, Ier2, Cox8a, Btg2, Gm8355, Egr1 
## Negative:  Mir6236, Gm37376, CT010467.1, Egf, Malat1, Umod, Gm24447, Nme7, mt-Nd5, mt-Rnr1 
##     Slc12a1, mt-Nd6, Kcnq1ot1, Neat1, Tmem52b, Lars2, Atp1b1, mt-Nd4l, Dst, Etnk1 
##     Wnk1, mt-Rnr2, Atrx, Slc5a3, Syne2, Robo2, mt-Co1, mt-Nd2, Zbtb20, Pnisr 
## PC_ 3 
## Positive:  Gm22133, Ldhb, Mgst1, Car15, Mpc2, Cd63, Tmem213, Fxyd2, Cd9, Cldn10 
##     Ftl1, Rpl9, Prdx1, Rplp1, Wfdc2, Rpl7, S100a1, Fth1, Ramp3, Rps27a 
##     Rps24, Ly6a, Irx1, Ppp1r1a, Atp5e, Apoe, Mdh1, Cox8a, Tmbim6, Rpl37 
## Negative:  Fos, Jun, Junb, Egr1, Hspa1b, Btg2, Hspa1a, Atf3, Zfp36, Klf6 
##     Ier2, Fosb, Socs3, Klf2, Jund, Dnajb1, Gadd45g, Tsc22d1, Gm37376, Rhob 
##     Malat1, Gadd45b, Actb, H1f2, Ubc, H2bc4, H3f3b, Egf, Nr4a1, Dusp1 
## PC_ 4 
## Positive:  S100g, Gm8420, Actb, Sdc4, Pth1r, Igfbp7, Gm8885, Pappa2, Malat1, Umod 
##     Ramp3, Tmem52b, Egf, Cebpd, Cd9, Tmsb10, Sgms2, Wnk1, Gm28037, Rpl41 
##     Ppm1h, Tmem158, Ccdc80, Lmna, Tbxas1, Pdcd4, Ppp1r1a, Tmbim6, Col4a3, Gm11808 
## Negative:  Mt1, Mt2, Rpl15-ps2, Rpl13, Ptger3, CT010467.1, Rpl10, Rpl9, Gm22133, mt-Rnr2 
##     Apoe, Rpl24, Aebp1, Rpl17, Jun, Rpsa, Ckb, Hspa8, Eef1a1, Rpl18 
##     Cd63, Gm13340, Gpx6, Gm5905, Uba52-ps, Gm23935, Gpx4, Rplp1, Mfsd4a, Chchd10 
## PC_ 5 
## Positive:  Pappa2, Tmem52b, Umod, Aard, Egf, Sult1d1, Foxq1, Actb, Tmem158, Gm44120 
##     Mcub, Pde10a, Rpl15-ps2, Defb42, Tmsb4x, Dctd, Car15, Rpl10, Kcnj10, 5330417C22Rik 
##     Begain, Rpl13, Rpl24, Abca13, Gsn, Zfand5, Eef1a1, Fabp3, 1700028P14Rik, Gm22133 
## Negative:  mt-Nd5, mt-Co1, mt-Nd4l, Gm28437, mt-Cytb, Malat1, Gm28438, mt-Rnr1, Kng1, Nme7 
##     mt-Nd2, Mir6236, Gm8420, Hspa1b, Gm28037, Gm10925, Defb1, Rps21, Atp1b1, Fxyd2 
##     Klk1, Gm11808, Gm8885, Tnfaip2, Cox6a1, Gm12338, S100g, Sostdc1, mt-Nd3, Car12
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5010
## Number of edges: 184583
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7657
## Number of communities: 10
## Elapsed time: 0 seconds
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 20:09:20 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:09:20 Read 5010 rows and found 30 numeric columns
## 20:09:20 Using Annoy for neighbor search, n_neighbors = 30
## 20:09:20 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:09:20 Writing NN index file to temp file /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp2fmjdA/file348639bf4bba
## 20:09:20 Searching Annoy index using 1 thread, search_k = 3000
## 20:09:20 Annoy recall = 100%
## 20:09:21 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 20:09:21 Initializing from normalized Laplacian + noise (using RSpectra)
## 20:09:21 Commencing optimization for 500 epochs, with 212904 positive edges
## 20:09:21 Using rng type: pcg
## 20:09:24 Optimization finished
DimPlot(control)

DimPlot(control,split.by = "sample")

Idents(control) <- "sample"
control.markers <- FindAllMarkers(control, only.pos = TRUE)
## Calculating cluster SO1
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster SO4
control.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
control.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 10) %>%
    ungroup() -> top10
DoHeatmap(control, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(control, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the SCT assay:
## Ldhb-ps, Gm8292, Rps6

4 Finding Markers that define each low salt Sample

low_salt <- SCTransform(low_salt) %>%
    RunPCA() %>%
    FindNeighbors(dims = 1:30) %>%
    FindClusters() %>%
    RunUMAP(dims = 1:30)
## Running SCTransform on assay: RNA
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 22041 by 6421
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## There are 2 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 187 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 22041 genes
## Computing corrected count matrix for 22041 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 13.71506 secs
## Determine variable features
## Centering data matrix
## Place corrected count matrix in counts slot
## Warning: Different cells and/or features from existing assay SCT
## Set default assay to SCT
## PC_ 1 
## Positive:  Pappa2, Malat1, Gm37376, Mir6236, Neat1, Zfand5, Robo2, Itga4, Wwc2, Col4a4 
##     Aard, Kcnq1ot1, Etnk1, Pde10a, Nadk2, Nos1, Col4a3, Srrm2, Gm24447, Ptgs2 
##     Hnrnpa2b1, Itprid2, Zbtb20, Sgms2, Acsl4, Peg3, Prpf4b, Pnisr, Bmp3, Ddx5 
## Negative:  Umod, Egf, Tmem52b, Sult1d1, Wfdc2, Sostdc1, Foxq1, Fabp3, Prdx5, Krt7 
##     Klk1, Cox6c, Ly6a, Slc25a5, Ckb, Cldn19, Ggt1, Atp1a1, Cox4i1, Atp5g1 
##     Wfdc15b, Cox5b, Cox8a, Ndufa4, Atp5b, Gadd45g, Gm47708, Chchd10, Pcolce, Gm44120 
## PC_ 2 
## Positive:  Egf, Mir6236, Gm37376, Malat1, Umod, Tmem52b, CT010467.1, Nme7, Neat1, Sult1d1 
##     mt-Nd5, mt-Nd4l, Foxq1, mt-Rnr1, mt-Nd6, Gm24447, Kcnq1ot1, Atp1b1, Slc12a1, Lars2 
##     Slc5a3, Gm28438, Dst, Gm23935, Wnk1, Sostdc1, mt-Co1, mt-Nd2, Fabp3, Etnk1 
## Negative:  Gm22133, Fth1, Ubb, Ftl1, Ldhb, Mt1, Prdx1, Eif1, Cd63, Cd9 
##     Car15, Mgst1, Mpc2, Aard, Ramp3, Clu, Fxyd2, Bsg, H3f3b, Rps24 
##     Tmem213, Rps5, Rpl7, Itm2b, Rpl9, Rps27a, Mcub, H2-D1, Lgals3, Rps7 
## PC_ 3 
## Positive:  Pappa2, Egf, Aard, Mir6236, Sfrp1, Umod, Hsp90b1, mt-Nd5, Gm13340, Slc12a1 
##     mt-Nd2, Ly6a, Atp1b1, Wnk1, Sec14l1, mt-Nd4, Wwc2, Robo2, Car15, Etnk1 
##     Tmem158, mt-Nd4l, Gm29216, CT010467.1, mt-Cytb, Mcub, Pde10a, Iyd, Gm28661, Cd9 
## Negative:  Fos, Junb, Jun, Hspa1b, Hspa1a, Btg2, Zfp36, Egr1, Ier2, Klf6 
##     Atf3, Socs3, Fosb, Klf2, Jund, Dusp1, Rhob, Dnajb1, Cebpd, Ubc 
##     Gadd45g, Tsc22d1, Gadd45b, H3f3b, Mt1, Mt2, Gm42793, Bhlhe40, Klf4, H1f2 
## PC_ 4 
## Positive:  mt-Rnr1, mt-Rnr2, Gm8420, Hspa1b, Mt1, Gm8885, CT010467.1, mt-Nd4l, Gm23935, mt-Co1 
##     Hspa1a, Gm28437, Apoe, mt-Nd5, Gm10925, mt-Cytb, Gm28438, Gm24447, Gpx6, Aebp1 
##     mt-Nd4, Gm28037, Gm28661, Mt2, Fgf9, Tmem213, Car4, mt-Nd2, Defb1, Egfl6 
## Negative:  Pappa2, Aard, Tmem52b, Umod, Egf, Sult1d1, Foxq1, Tmem158, Hsp90b1, Ptgs2 
##     Mcub, Ptprz1, Wwc2, Dctd, Tmsb4x, Iyd, S100g, Ramp3, H3f3b, Gm44120 
##     Calr, Tsc22d1, 5330417C22Rik, Defb42, Cd9, 1700028P14Rik, Igfbp7, Tagln2, Wnk1, Lcn2 
## PC_ 5 
## Positive:  Gm8420, Mir6236, Mt1, Tmem52b, CT010467.1, Gm23935, Gm8885, Mt2, Aard, Foxq1 
##     Sult1d1, Gm24447, Gm8355, Pappa2, Rpl37, mt-Rnr1, Rpl41, Tmsb4x, Rpl37a, Gm11808 
##     Lars2, Atp5e, Atp5md, Gm28037, Zfp503, Cox6c, Gm44120, Gabarapl1, Gm15564, mt-Rnr2 
## Negative:  Umod, Hspa1b, Hspa1a, Gm13340, mt-Nd5, Gm29216, Klk1, mt-Co1, Atp1b1, mt-Nd4 
##     Ly6a, Sfrp1, Gm28661, mt-Cytb, Rpl15-ps2, Kng2, Gm28437, Iyd, mt-Nd2, Gm10925 
##     Egf, Sdc4, Fth1, Itm2b, Pth1r, Id2, Cldn10, Malat1, Wnk1, Ier3
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6421
## Number of edges: 222440
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7509
## Number of communities: 12
## Elapsed time: 0 seconds
## 20:11:01 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:11:01 Read 6421 rows and found 30 numeric columns
## 20:11:01 Using Annoy for neighbor search, n_neighbors = 30
## 20:11:01 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:11:01 Writing NN index file to temp file /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp2fmjdA/file34867a07eef
## 20:11:01 Searching Annoy index using 1 thread, search_k = 3000
## 20:11:02 Annoy recall = 100%
## 20:11:03 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 20:11:03 Initializing from normalized Laplacian + noise (using RSpectra)
## 20:11:03 Commencing optimization for 500 epochs, with 283086 positive edges
## 20:11:03 Using rng type: pcg
## 20:11:06 Optimization finished
DimPlot(low_salt)

DimPlot(low_salt,split.by = "sample")

Idents(low_salt) <- "sample"
low_salt.markers <- FindAllMarkers(control, only.pos = TRUE)
## Calculating cluster SO1
## Calculating cluster SO4
low_salt.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
low_salt.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 10) %>%
    ungroup() -> top10
DoHeatmap(low_salt, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(low_salt, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the SCT assay:
## Ldhb-ps, Gm8292, Gpx4, Uba52-ps, Rps6, Rpl18, Rpl13

5 Visualizing Difference

FeaturePlot(control,"Ptger3")
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

FeaturePlot(control, "S100g")

VlnPlot(control,"Hspa8")

VlnPlot(control, "Ptger3")

VlnPlot(control, "S100g")

markers.to.plot1 <- c("Ptger3","Hspa8","S100g")
  

DotPlot(control,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

control expresses highly in Ptger3, SO4 expresses high in S100g.

S01 Also has a lot more Ribosomal gene influence.

Why?

6 Separating the markers

control_markers <- control.markers[control.markers$cluster == "control", ]
SO4_markers <- control.markers[control.markers$cluster == "SO4", ]

7 View pathway analysis for the genes

7.1 control, Control sample 1

df<- control.markers %>% arrange(desc(avg_log2FC))

df2 <- df %>% filter(p_val_adj < 0.05)

DEG_list <- df2

markers <- DEG_list %>% rownames_to_column(var="SYMBOL")

head(markers, n = 50)
ENTREZ_list <- bitr(
  geneID = rownames(DEG_list),
  fromType = "SYMBOL",
  toType = "ENTREZID",
  OrgDb = org.Mm.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = rownames(DEG_list), fromType = "SYMBOL", toType =
## "ENTREZID", : 5.33% of input gene IDs are fail to map...
markers <-  ENTREZ_list %>% inner_join(markers, by = "SYMBOL")

# Removing genes that are not statistically significant. 
markers <-  markers %>% dplyr::filter(p_val_adj < 0.05)
#head(markers, n = 50)

pos.markers <-  markers %>% dplyr::filter(avg_log2FC > 0) %>%  arrange(desc(abs(avg_log2FC)))
#head(pos.markers, n = 50)

pos.ranks <- pos.markers$ENTREZID[abs(pos.markers$avg_log2FC) > 0]
#head(pos.ranks)

pos_go <- enrichGO(gene = pos.ranks,           #a vector of entrez gene id
                   OrgDb = "org.Mm.eg.db",    
                   ont = "BP",
                   readable = TRUE)              #whether mapping gene ID to gene Name

pos_go
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:2043] "667014" "100041678" "667937" "20564" "671215" "13653" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...1489 enriched terms found
## 'data.frame':    1489 obs. of  12 variables:
##  $ ID            : chr  "GO:0002181" "GO:0140236" "GO:0140241" "GO:0140242" ...
##  $ Description   : chr  "cytoplasmic translation" "translation at presynapse" "translation at synapse" "translation at postsynapse" ...
##  $ GeneRatio     : chr  "79/1737" "37/1737" "37/1737" "37/1737" ...
##  $ BgRatio       : chr  "171/28928" "47/28928" "48/28928" "48/28928" ...
##  $ RichFactor    : num  0.462 0.787 0.771 0.771 0.221 ...
##  $ FoldEnrichment: num  7.69 13.11 12.84 12.84 3.68 ...
##  $ zScore        : num  22.2 21 20.7 20.7 14.5 ...
##  $ pvalue        : num  2.98e-50 1.27e-36 5.23e-36 5.23e-36 1.46e-30 ...
##  $ p.adjust      : num  1.77e-46 3.78e-33 7.78e-33 7.78e-33 1.74e-27 ...
##  $ qvalue        : num  1.17e-46 2.50e-33 5.14e-33 5.14e-33 1.15e-27 ...
##  $ geneID        : chr  "Gm6133/Rps6/Rpl13/Rpl18/Eif2s3y/Rpl17/Rpsa/Rps19/Rpl9/Rpl10a/Rpl24/Rpl28/Rpl18a/Rpl21/Rpl27a/Rps6-ps4/Rpl12/Rpl"| __truncated__ "Rpl10/Rpl13/Rpl17/Rpl9/Rpl10a/Rpl24/Rpl28/Rpl27a/Rpl12/Rpl38/Rps28/Rpl15/Rpl37a/Rpl37/Rpl8/Rps16/Rpl6/Rpl13a/Rp"| __truncated__ "Rpl10/Rpl13/Rpl17/Rpl9/Rpl10a/Rpl24/Rpl28/Rpl27a/Rpl12/Rpl38/Rps28/Rpl15/Rpl37a/Rpl37/Rpl8/Rps16/Rpl6/Rpl13a/Rp"| __truncated__ "Rpl10/Rpl13/Rpl17/Rpl9/Rpl10a/Rpl24/Rpl28/Rpl27a/Rpl12/Rpl38/Rps28/Rpl15/Rpl37a/Rpl37/Rpl8/Rps16/Rpl6/Rpl13a/Rp"| __truncated__ ...
##  $ Count         : int  79 37 37 37 100 92 54 66 77 88 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
 dotplot(pos_go) +
    ggtitle("") +
    theme_classic() + 
    theme(
        plot.title = element_text(hjust = 0.5),
        legend.position = "left",
        axis.text.y = element_text(hjust = 0, size = 10)) +
    scale_y_discrete(position = "right", 
                     labels = function(x) str_wrap(x, width = 25))  # Wrap y-axis labels to 2 lines
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.